import scrapy
from scrapy.loader import ItemLoader
from guoxuedashi.items import GuoxuedashiItem, GuoxuedashiInfoItem


class GuoxuedashiSpider(scrapy.Spider):
    name = "guoxuedashi"

    def start_requests(self):
        urls = [
            "https://www.guoxuedashi.net/a/18c/"
        ]
        for url in urls:
            yield scrapy.Request(url=url, callback=self.parse)

    def parse(self, response):
        for dd_item in response.xpath("//html[1]/body[1]/div[3]/div[1]/div[3]/div[1]/dl[1]/dd"):
            a_tag = dd_item.xpath("a")
            if a_tag:
                link_url = a_tag.xpath('@href').get()
                info_item = GuoxuedashiItem()
                info_item['url'] = link_url
                info_url = "https://www.guoxuedashi.net" + link_url
                yield scrapy.Request(url=info_url, callback=self.parse_article)

    def parse_article(self, response):
        loader = ItemLoader(item=GuoxuedashiInfoItem(), response=response)
        article_title = response.xpath('//*[@id="ArtContent"]/h1[1]')
        title = article_title.xpath('text()').get()
        self.logger.info("====================================\n")
        self.logger.info(title)
        loader.add.value('directory', title)
        article_content = response.xpath('//*[@id="infozj_txt"]')
        contents = article_content.xpath('text()')
        articles = []
        for index, text_content in enumerate(contents):
            content = text_content.get().strip()
            if content == '':
                continue
            articles.append(content)
        loader.add.value('title', articles.pop(0).replace("◎", ""))
        loader.add.value('content', articles)
        yield loader.load_item()